//version 16.0
clear all
set more off
capture log close

/* Description:

  This file produces the US figures for:
	*Figure 1b: sons and daughters pooled on combined (mean) parental income
	*Figure 2b: sons and daughters separately on combined (mean) parental income
	*Figure 3b: sons and daughters pooled on father's and mother's income
	*Figure 4a: sons and daughters separately on father's income
	*Figure 4b: sons and daughters separately on mother's income
*/

log using ${us_results}/figures_trends.log, replace
************************************
*IRP estimates for figures 1-5
************************************
use *newid *LAB* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear
/* Rename US vars to match Swedish varnames */
rename newid idnr 
rename female woman
rename cohort yob 
rename pLAB pearn 
rename m_cohort myob
rename f_cohort fyob
rename m_LABYR mobsy 
rename f_LABYR fobsy 
rename m_LAB mearn 
rename f_LAB fearn
rename pm_LAB pmearn 
rename pf_LAB pfearn
rename pp_MFAVG pfmearn 
/* End US variable renames */
g agec1=year-yob-40
g agef1=fobsy-fyob
g agem1=mobsy-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
*Figure 1: sons and daughters pooled on combined (mean) parental income
global fig "1"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure 2: sons and daughters separately on father's income
*Sons
global fig "2s"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "2d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure 3: sons and daughters separately on mother's income
*Sons
global fig "3s"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "3d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof year, local(year)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure 4: sons and daughters separately on combined (mean) parental income
*Sons
global fig "4s"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "4d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure 5: sons and daughters pooled on father's and mother's income
*Fathers
global fig "5f"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Mothers
global fig "5m"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Get number and list of years 
levelsof year, local(yearlist)
gl nyears: word count `yearlist'
di "Number of years=$nyears"
drop _all
set obs $nyears
local i=0
gen year=.
forv year=1/$nyears {
	local i=`i'+1
	replace year=`:word `i' of `yearlist'' if _n==`i'
}
local est "1 2s 2d 3s 3d 4s 4d 5f 5m"
foreach e of local est{
	svmat b`e'
	svmat se`e'
	svmat var`e'
	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
}
rename (b11 b2s1 b2d1 b3s1 b3d1 b4s1 b4d1 b5f1 b5m1) (irp1 irp2s irp2d irp3s irp3d irp4s irp4d irp5f irp5m)
lab var irp1 "pooled/parental average"
lab var irp2s "son/father"
lab var irp2d "daughter/father"
lab var irp3s "son/mother"
lab var irp3d "daughter/mother"
lab var irp4s "son/parental average"
lab var irp4d "daughter/parental average"
lab var irp5f "pooled/father"
lab var irp5m "pooled/mother"
lab var var11 "pooled/parental average"
lab var var2s1 "son/father"
lab var var2d1 "daughter/father"
lab var var3s1 "son/mother"
lab var var3d1 "daughter/mother"
lab var var4s1 "son/parental average"
lab var var4d1 "daughter/parental average"
lab var var5f1 "pooled/father"
lab var var5m1 "pooled/mother"
lab var sd1u "pooled/parental average"
lab var sd2su "son/father"
lab var sd2du "daughter/father"
lab var sd3su "son/mother"
lab var sd3du "daughter/mother"
lab var sd4su "son/parental average"
lab var sd4du "daughter/parental average"
lab var sd5fu "pooled/father"
lab var sd5mu "pooled/mother"
lab var sd1l "pooled/parental average"
lab var sd2sl "son/father"
lab var sd2dl "daughter/father"
lab var sd3sl "son/mother"
lab var sd3dl "daughter/mother"
lab var sd4sl "son/parental average"
lab var sd4dl "daughter/parental average"
lab var sd5fl "pooled/father"
lab var sd5ml "pooled/mother"
compress
save ${us_results}/figures_us_irp.dta, replace

************************************
***Creating IRP figures 1-4
************************************

*Figure 1b: sons and daughters pooled on combined (mean) parental income
twoway (connect irp1 year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd1u sd1l year, lc(ebblue)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1) label(1 "Pooled sons and daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${us_results}/fig1b", replace)
graph export "${tabfig}/fig1b.pdf", replace
graph export "${tabfig}/fig1b.eps", replace

*Figure 2b: sons and daughters separately on combined (mean) parental income
twoway (connect irp4s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd4su sd4sl year, lc(ebblue)) (connect irp4d year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd4du sd4dl year, lc(cranberry)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${us_results}/fig2b", replace)
graph export "${tabfig}/fig2b.pdf", replace
graph export "${tabfig}/fig2b.eps", replace

*Figure 3b: sons and daughters pooled on father's and mother's income
twoway (connect irp5f year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd5fu sd5fl year, lc(ebblue)) (connect irp5m year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd5mu sd5ml year, lc(cranberry)),  ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Fathers") label(3 "Mothers")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${us_results}/fig3b", replace)
graph export "${tabfig}/fig3b.pdf", replace
graph export "${tabfig}/fig3b.eps", replace

*Figure 4b: sons and daughters separately on father's income
twoway (connect irp2s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd2su sd2sl year, lc(ebblue)) (connect irp2d year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd2du sd2dl year, lc(cranberry)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${us_results}/fig4a", replace)
graph export "${tabfig}/fig4b.pdf", replace
graph export "${tabfig}/fig4b.eps", replace

*Figure 4d: sons and daughters separately on mother's income
twoway (connect irp3s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd3su sd3sl year, lc(ebblue)) (connect irp3d year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd3du sd3dl year, lc(cranberry)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${us_results}/fig4b", replace)
graph export "${tabfig}/fig4d.pdf", replace
graph export "${tabfig}/fig4d.eps", replace




log close
clear
